Pre-processing of the data

ls_preprocessed <- preprocess_rna(path_rnaseq = 'rnaseq.RData', correct_batch = T, correct_gender = F)

Exploring data

Batch effect correction

print(ls_preprocessed$pbatch_bf)

print(ls_preprocessed$pgender_bf)

print(ls_preprocessed$pbatch_af)

print(ls_preprocessed$pgender_af)

DE analysis

DE_res <- DE_analysis(ls_preprocessed, 
           GeneBased=TRUE, 
           pDataBased=FALSE,
           NewCondition=FALSE,
           cond_nm='ENSG00000151012.9',
           reference = 'low', # low, alive
           correct_gender=FALSE,
           extremes_only=TRUE)
## Unlist done
## Labeling done
## Filtering done
## factor levels were dropped which had no samples
## Design done
## Warning: Column `gene`/`ENSEMBL` joining character vector and factor, coercing
## into character vector
## vsd symbols done
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 2599 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## DESeq done
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## Warning: Column `gene`/`ENSEMBL` joining character vector and factor, coercing
## into character vector
## res symbols done
## list done

DE results

heatmap_200(DE_res$res_df, DE_res$vsd_mat_sym, DE_res$meta_data, DE_res$pData_rnaseq)

x <- DE_res$res_df %>%
  arrange(desc(abs(log2FoldChange)))
rownames(x) <- make.names(x$symbol, unique = T)
k <- c('ENSG00000250033.1', 'ENSG00000151012.9')
x <- x[-which(x$gene %in%k),]
head(x, 10)
##                  baseMean log2FoldChange     lfcSE       stat       pvalue
## NSG2             5.380060       4.282002 0.3409770  2.5065713 1.219085e-02
## CRP              1.342740       4.200554 0.3599800 -0.3319593 7.399200e-01
## SH2D6           39.512000       3.878516 0.2606023  2.6554344 7.920634e-03
## TRPM5           71.839820       3.814712 0.3230996  1.9415596 5.219044e-02
## PRSS33           4.311685       3.603933 0.3644315 -0.2470052 8.049042e-01
## ALDH3A1       1026.048613       3.423719 0.3799263  7.7797091 7.269149e-15
## RP11.443P15.2  381.152595       3.276628 0.3549036  9.3641811 7.664230e-21
## AKR1C1        6047.395226       3.244793 0.3592801  9.2483463 2.279925e-20
## S100P         1044.120574       3.231819 0.3723356  8.9688111 2.997321e-19
## CYP4F3         224.272413       3.161299 0.3569107  8.9171491 4.784453e-19
##                       padj               gene        symbol
## NSG2          1.502153e-01  ENSG00000170091.6          NSG2
## CRP                     NA  ENSG00000132693.8           CRP
## SH2D6         1.186804e-01 ENSG00000152292.12         SH2D6
## TRPM5         3.115785e-01  ENSG00000070985.9         TRPM5
## PRSS33        9.325117e-01  ENSG00000103355.8        PRSS33
## ALDH3A1       2.004943e-11 ENSG00000108602.13       ALDH3A1
## RP11.443P15.2 9.160287e-17  ENSG00000171658.4 RP11-443P15.2
## AKR1C1        2.043725e-16  ENSG00000187134.8        AKR1C1
## S100P         1.791199e-15  ENSG00000163993.6         S100P
## CYP4F3        2.450734e-15 ENSG00000186529.10        CYP4F3
volcano_plot(x, gene=NULL, p_title='SLC7A11')

Pathway enrichment analysis fGSEA

Low SLC7A11 is the reference. When SLC7A11 is high, pathways shown below are up- or down- regulated

fgsea_res <- fgsea_analysis(DE_res)
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgsea_plot(fgsea_res$res_hm, pathways_title='Hallmark', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c1, pathways_title='C1 positional genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c2, pathways_title='C2 curated genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c3, pathways_title='C3 regulatory target genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c4, pathways_title='C4 cancer', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c5, pathways_title='C5 GO genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c6, pathways_title='C6 oncogenic', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c7, pathways_title='C7 immunologic', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_msg, pathways_title='All signatures', condition_name='SLC7A11 low vs high')